package com.simoniu.sparkdemo.javademo.sql;

import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/**
 * 需求：使用sql操作DataFrame
 * Created by simoniu
 */
public class DataFrameSqlJavaDemo {

    public static void main(String[] args) {
        SparkConf conf = new SparkConf();
        conf.setMaster("local");
        //创建SparkSession对象，里面包含SparkContext和SqlContext
        SparkSession sparkSession = SparkSession.builder()
                .appName("DataFrameSqlJava")
                .config(conf)
                .getOrCreate();

        Dataset<Row> stuDf = sparkSession.read().json("D:\\uploadFiles\\students.json");
        //将Dataset<Row>注册为一个临时表
        stuDf.createOrReplaceTempView("student");
        //使用sql查询临时表中的数据
        sparkSession.sql("select gender,count(*) as num from student group by gender")
                .show();

        sparkSession.stop();
    }
}
